################################################################################
##
## Purpose: This script creates SI Figure 23
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
## Input Files:
##  - ./data/finalData.RData
##
## Output Files:
##  - ./output/figures/SI_figure_23.pdf
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
library(openai)
require(BradleyTerry2)
require(fixest)
require(qvcalc)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

load('./data/prepped/finalData.RData')


Sys.setenv(OPENAI_API_KEY = '') # Enter OpenAI key here

# The following code is included for reference, but is not part of the replication materials.

# create_prompt <- function(chunk,direction = 'more') {
#   res <- list(
#     list(
#       "role" = "system",
#       "content" = "An aggressive communication style is a way of communicating with others than involves assertiveness, dominance, bluntness, verbal attacks, ignoring boundaires, hostility, lack of empathy, manipulation, and defensiveness."
#     ),
#     list(
#       "role" = "user",
#       "content" = stringr::str_c(
#         'Please read the following conversations between a chair of the Federal Reserve and a member of Congress. Out of the two examples, which conversation is ',
#         direction,' aggressive overall? Within the selected conversation, which speaker is more aggressive?\n\n',
#         chunk)
#     )
#   )
#   return(res)
# }
# 
# submit_openai <- function(prompt, temperature = 0.2, n = 1) {
#   res <- openai::create_chat_completion(model = "gpt-3.5-turbo",
#                                         messages = prompt,
#                                         temperature = temperature,
#                                         n = n)
#   Sys.sleep(1)
#   res
# }
# 
# 
# # Can we do this pairwise?
# toSample <- utterance_level %>%
#   arrange(docID,ind) %>%
#   select(docID,chamber,date,speaker,opensecretsID,ind,nchars,textclean) %>%
#   group_by(docID) %>%
#   mutate(firstFED = ifelse(grepl("FED",opensecretsID),ind,NA)) %>%
#   mutate(firstFED = min(firstFED,na.rm=T)) %>%
#   filter(ind >= firstFED) %>%
#   arrange(docID,ind) %>%
#   select(docID,chamber,date,ind,speaker,textclean,opensecretsID) %>%
#   mutate(delta = ind - lag(ind)) %>%
#   filter(delta == 1) %>%
#   mutate(delta2 = ind - lag(ind)) %>%
#   mutate(delta2 = ifelse(is.na(delta2),1,delta2)) %>%
#   mutate(chunkIndicator = cumsum(speaker != lag(speaker,2,default = speaker[1]))) %>%
#   mutate(chunkIndicator = ifelse(chunkIndicator == (lead(chunkIndicator) - 1), lead(chunkIndicator),chunkIndicator)) %>%
#   mutate(chunkIndicator = ifelse(chunkIndicator == (lag(chunkIndicator) + 1) & (chunkIndicator == lead(chunkIndicator) - 1), 
#                                  lag(chunkIndicator),chunkIndicator)) %>%
#   group_by(docID,chunkIndicator) %>%
#   mutate(n = n()) %>%
#   ungroup() %>%
#   mutate(nchars = nchar(textclean)) %>%
#   filter(n > 2) %>%
#   mutate(textclean = paste0(gsub('\\.$','',speaker),': ',textclean)) %>%
#   group_by(docID,chamber,date,chunkIndicator,n) %>%
#   summarise(text = paste(textclean,collapse = '\n')) %>%
#   ungroup() %>%
#   mutate(fed = ifelse(date < as.Date('2006-01-01'),'Greenspan',
#                       ifelse(date < as.Date('2014-01-01'),'Bernanke',
#                              ifelse(date < as.Date('2018-01-01'),'Yellen','Powell')))) %>%
#   mutate(nchars = nchar(text)) %>%
#   rowwise() %>%
#   filter(grepl(fed,text)) %>%
#   ungroup()
# 
# set.seed(123)
# chunks <- list()
# counter <- 1
# for(nConv in c(3:12)) {
#   qntls <- quantile(toSample %>%
#                       filter(n == nConv) %>% 
#                       pull(nchars))
#   
#   for(q in 2:length(qntls)) {
#     tmp <- toSample %>%
#       filter(n == nConv,
#              nchars < qntls[q],
#              nchars > qntls[q-1])
#     
#     for(i in 1:50) {
#       tmp2 <- tmp %>%
#         group_by(fed) %>%
#         sample_n(size = 1) %>%
#         ungroup() %>%
#         sample_n(size = 2) %>%
#         slice(sample(1:2,2))
#       
#       chunks[[counter]] <- list()
#       chunks[[counter]]$chunk <- paste(paste0('Conversation ',1:2,':\n',
#                                               tmp2 %>% pull(text),collapse = '\n\n'))
#       chunks[[counter]]$srcs <- tmp2
#       counter <- counter + 1
#     }
#   }
# }
# 
# test <- NULL
# for(i in 1:length(chunks)) {
#   test <- test %>%
#     bind_rows(chunks[[i]]$srcs %>%
#                 mutate(index = i,
#                        rown = row_number()))
# }
# 
# 
# res <- NULL
# for(i in 1:length(chunks)) {
#   cat('----------------------------\n',i,'\n----------------------------\n')
#   for(d in c('more','less')) {
#     for(rev in c(T,F)) {
#       if(rev) {
#         torev <- str_split(chunks[[i]]$chunk,pattern = '(\n\nConversation 2:)')[[1]]
#         chnk <- paste(paste0('Conversation 1:',torev[2]),
#                       gsub('Conversation 1','Conversation 2',torev[1]),sep = '\n\n')
#         srcs <- chunks[[i]]$srcs %>% slice(2,1)
#       } else {
#         chnk <- chunks[[i]]$chunk
#         srcs <- chunks[[i]]$srcs
#       }
#       
#       prompts <- create_prompt(chunk = chnk,direction = d)
#       if(nchar(prompts[[2]]$content) > 10000) { next }
#       
#       Sys.sleep(2)
#       system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
#       
#       while(class(openai_completions) == 'try-error') {
#         Sys.sleep(5)
#         system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
#       }
# 
#       res <- res %>%
#         bind_rows(srcs %>%
#                     select(-docID,-chunkIndicator) %>%
#                     mutate(id = row_number()) %>%
#                     pivot_wider(names_from = id,values_from = c('date','n','text','fed','nchars','chamber')) %>%
#                     mutate(explanation = openai_completions$choices$message.content,
#                            direction = d,
#                            reversed = rev))
#     }
#   }
# }
# 
# save(res,file = './output/chatGPT_polite_BTM_aggression.RData')
# 
# 
# rm(list = ls())
# load('./output/chatGPT_polite_BTM_aggression.RData')
# 
# 
# create_prompt <- function(chunk) {
#   res <- list(
#     list(
#       "role" = "system",
#       "content" = "You are a helpful AI assistant."
#     ),
#     list(
#       "role" = "user",
#       "content" = stringr::str_c(
#         'The following is a summary assessment of which of two conversations contains more aggressive language. 
#         Please extract just the number of the conversation that is deemed to be more aggressive.
#         If there is not enough information provide, return a -1. Do not return any text in your response.\n"',
#         chunk,'"')
#     )
#   )
#   return(res)
# }
# 
# submit_openai <- function(prompt, temperature = 0, n = 1) {
#   res <- openai::create_chat_completion(model = "gpt-3.5-turbo",
#                                         messages = prompt,
#                                         temperature = temperature,
#                                         n = n)
#   Sys.sleep(1)
#   res
# }
# 
# cleanRes <- list()
# res$cleaned <- NA
# for(i in which(is.na(res$cleaned))) {
#   Sys.sleep(2)
#   cat('----------------------------\n',i,'\n----------------------------\n')
#   prompts <- create_prompt(res %>% slice(i) %>% pull(explanation))
#   
#   system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
#   while(class(openai_completions) == 'try-error') {
#     Sys.sleep(5)
#     system.time(openai_completions <- try(submit_openai(prompt = prompts,temperature = 0,n = 1)))
#   }
#   cleanRes[[i]] <- openai_completions$choices$message.content
#   res$cleaned[i] <- cleanRes[[i]]
# }
# 
# save(res,file = './output/chatGPT_polite_BTM_aggression_cleaned.RData')


load('./output/chatGPT_polite_BTM_aggression_cleaned.RData')

res <- res %>%
  mutate(cleaned = as.numeric(str_extract(cleaned,'(-)*\\d'))) %>%
  mutate(cleaned = ifelse(cleaned == 0,-1,cleaned))

tab <- res %>%
  filter(cleaned != -1) %>%
  filter(direction == 'more',!reversed) %>%
  mutate(moreAgg = ifelse((cleaned == 1 & direction == 'more') | 
                            (cleaned == 2 & direction == 'less'),fed_1,fed_2)) %>%
  mutate(lessAgg = ifelse((cleaned == 1 & direction == 'more') |
                            (cleaned == 2 & direction == 'less'),fed_2,fed_1)) %>%
  mutate(length = ifelse(n_1 < 5,'3-4',
                         ifelse(n_1 < 7,'5-6',
                                ifelse(n_1 < 9,'7-8','9+')))) %>%
  count(moreAgg,lessAgg,length)

pdf('./output/figures/SI_figure_23.pdf',width = 8,height = 4)
tab %>%
  mutate(dyadID = ifelse((moreAgg == 'Bernanke' & lessAgg == 'Greenspan') | 
                           (moreAgg == 'Greenspan' & lessAgg == 'Bernanke'),'Bernanke_Greenspan',
                         ifelse((moreAgg == 'Bernanke' & lessAgg == 'Powell') | 
                                  (moreAgg == 'Powell' & lessAgg == 'Bernanke'),'Bernanke_Powell',
                                ifelse((moreAgg == 'Bernanke' & lessAgg == 'Yellen') | 
                                         (moreAgg == 'Yellen' & lessAgg == 'Bernanke'),'Bernanke_Yellen',
                                       ifelse((moreAgg == 'Greenspan' & lessAgg == 'Powell') | 
                                                (moreAgg == 'Powell' & lessAgg == 'Greenspan'),'Greenspan_Powell',
                                              ifelse((moreAgg == 'Greenspan' & lessAgg == 'Yellen') | 
                                                       (moreAgg == 'Yellen' & lessAgg == 'Greenspan'),'Greenspan_Yellen','Powell_Yellen')))))) %>%
  group_by(dyadID,length) %>%
  mutate(tot = sum(n)) %>%
  ungroup() %>%
  mutate(share = n / tot) %>%
  group_by(dyadID) %>%
  select(-dyadID,-tot,-n) %>%
  mutate(lessAgg = factor(lessAgg,levels = rev(c('Bernanke','Greenspan','Powell','Yellen'))),
         moreAgg = factor(moreAgg,levels = (c('Bernanke','Greenspan','Powell','Yellen')))) %>%
  ggplot(aes(x = lessAgg,y = moreAgg,fill = share)) + 
  geom_tile() + 
  geom_text(aes(label = paste0(round(share*100,0),'%')),size =3.5) + 
  scale_fill_gradient2(midpoint = .5,low = 'darkred',mid = 'white',high = 'darkgreen') + 
  theme_bw()+
  labs(x = 'Less aggressive conversations',
       y = 'More aggressive conversations',
       fill = 'Proportion of\ncomparisons',
       title = 'AI-annotated aggression',
       subtitle = 'Proportion of comparisons (y-axis to x-axis)\nthat are more aggressive by conversation length (facets)') + 
  facet_grid(~length) + 
  theme(legend.position = 'none',
        axis.text.x = element_text(angle = 45,hjust = 1))
dev.off()


# BT WORK
aggressive.sf <- res %>%
  as_tibble() %>%
  filter(cleaned != -1) %>%
  mutate(choice = cleaned,
         year_1 = lubridate::year(date_1),
         year_2 = lubridate::year(date_2)) %>%
  mutate(period_1 = ifelse(year_1 < 2006,'Greenspan',
                           ifelse(year_1 < 2014,'Bernanke',
                                  ifelse(year_1 < 2018,'Yellen','Powell')))) %>%
  mutate(period_2 = ifelse(year_2 < 2006,'Greenspan',
                           ifelse(year_2 < 2014,'Bernanke',
                                  ifelse(year_2 < 2018,'Yellen','Powell')))) %>%
  count(fed_1,fed_2,choice,direction,reversed,chamber_1,chamber_2,n_1,n_2,nchars_1,nchars_2,year_1,year_2,period_1,period_2) %>%
  spread(choice,n,sep='_') %>%
  mutate_at(vars(choice_1,choice_2),function(x) ifelse(is.na(x),0,x))

aggressive.sf2 <- aggressive.sf %>%
  filter(direction == 'more',reversed) %>%
  mutate(fed_1 = factor(fed_1,levels = c('Yellen','Greenspan','Bernanke','Powell')),
         fed_2 = factor(fed_2,levels = c('Yellen','Greenspan','Bernanke','Powell')))

aggressive.sf2$fed_1 <- data.frame(fed = aggressive.sf2$fed_1,
                              chamber = aggressive.sf2$chamber_1,
                              year = aggressive.sf2$year_1,
                              nchars = aggressive.sf2$nchars_1,
                              period = aggressive.sf2$period_1)

aggressive.sf2$fed_2 <- data.frame(fed = aggressive.sf2$fed_2,
                              chamber = aggressive.sf2$chamber_2,
                              year = aggressive.sf2$year_2,
                              nchars = aggressive.sf2$nchars_2,
                              period = aggressive.sf2$period_2)


summary(aggressiveModel <- BTm(cbind(choice_1,choice_2),fed_1,fed_2, ~fed + chamber + factor(year) + log(nchars),
                   id = 'fed',data = aggressive.sf2))


summary(update(aggressiveModel,br=T))

qv <- qvcalc(BTabilities(aggressiveModel))


pdf('./output/figures/SI_figure_24.pdf',width = 7,height = 5)
qv$qvframe %>%
  mutate(chair = row.names(.)) %>%
  as_tibble() %>%
  ggplot(aes(x = estimate,y = reorder(chair,estimate))) + 
  geom_point(size = 3) + 
  geom_errorbarh(aes(xmin = estimate - 1.96*quasiSE,xmax = estimate + 1.96*quasiSE),height = 0) + 
  geom_errorbarh(aes(xmin = estimate - 1.65*quasiSE,xmax = estimate + 1.65*quasiSE),height = 0,size = 1.2) + 
  geom_vline(xintercept = 0,linetype = 'dashed') + 
  theme_bw() + 
  labs(x = 'More aggressive conversations',y = 'Fed Chair',
       title = 'Bradley-Terry measure of aggression',
       subtitle = 'Estimated aggression relative to Yellen')
dev.off()

# EOF